# -*- coding: utf-8 -*-
import scrapy
from ippool.items import IppoolItem

class IpSpider(scrapy.Spider):

	name = "ip"
	allowed_domains = ["xicidaili.com"]
	start_urls = ['http://xicidaili.com/']


	def start_requests(self):
		reqs = []
		for i in range(2,200):#抓取的页面页码数量，
			req = scrapy.Request('http://www.xicidaili.com/nn/%s'%i)
			reqs.append(req)
		return reqs

	def parse(self, response):
		item = IppoolItem()
		#tr = response.xpath("//*[@id='ip_list']/tr")

		item["IP"] = response.xpath("//*[@id='ip_list']/tr/td[2]/text()").extract()
		item["PORT"] = response.xpath("//*[@id='ip_list']/tr/td[3]/text()").extract()
		item["POSITION"] = response.xpath("//*[@id='ip_list']/tr/td[4]/a/text()").extract()
		item["TYPE"] = response.xpath("//*[@id='ip_list']/tr/td[6]/text()").extract()
		item["SPEED"] = response.xpath("//*[@id='ip_list']/tr/td[7]/div/@title").extract()
		item["LAST_CHECK_TIME"] = response.xpath("//*[@id='ip_list']/tr/td[10]/text()").extract()
		return item
